home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Precision Software Appli…tions Silver Collection 1
/
Precision Software Applications Silver Collection Volume One (PSM) (1993).iso
/
tutor
/
asm1tut.exe
/
MISHMASH.DOC
< prev
next >
Wrap
Text File
|
1990-08-06
|
31KB
|
842 lines
The PC Assembler Tutor - Copyright (C) 1990 Chuck Nelson
______________________
MISHMASH
This document contains several assembler programs. It has no page breaks and
no footnotes so you can cut the programs directly out of the text with a word
processor.
BLOCK MOVE
The first subroutine does a block move from one place in memory to another. It
is designed so the sorce block and the target block can be overlapping. It
first calculates the total address of the sorce block and the target block. If
the sorce block is below the target block the move starts at the top of the
source block and moves down. If the source block is above the target block the
move starts at the bottom of the source block and moves up. This makes sure
that overlapping data will not be clobbered.
This calculates the full 20 bit address. It was designed for BASIC; BASIC
sometimes requires the full 20 bit address. For many languages, all you need
to do is look at the offset addresses since segments cannot overlap. This is
NOT true of something called the "HUGE" mode, where you need to calculate the
full 20 bit address.
+++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
include /pushregs.mac
_TEXT SEGMENT PUBLIC 'CODE'
ASSUME cs:_TEXT
PUBLIC BlockMove
; - - - - - - - - - -
; BlockMove ( from.seg, from.off, to.seg, to.off, byte.count)
; for BASIC
; MOVSW is from DS:[SI] to ES:[DI]
FROM_SEG_ADDRESS EQU [bp+14]
FROM_OFFSET_ADDRESS EQU [bp+12]
TO_SEG_ADDRESS EQU [bp+10]
TO_OFFSET_ADDRESS EQU [bp+8]
BYTE_COUNT_ADDRESS EQU [bp+6]
; - - - - - - - - - -
BlockMove proc far
push bp
mov bp, sp
PUSHREGS ax, bx, cx, dx, si, di, es, ds
; AX:BX is the total FROM address
; DX:DI is the total TO address
; (FROM address > TO address) -> upwards
; (FROM address < TO address) -> downwards
; calculate 20 bit total address
sub ax, ax ; zero AX
mov si, FROM_SEG_ADDRESS
mov bx, [si] ; from_seg to BX
sub dx, dx ; zero DX
mov si, TO_SEG_ADDRESS
mov di, [si] ; to_seg to DI
mov cx, 4 ; shift 4 bytes
shift_loop:
shl bx, 1
rcl ax, 1 ; carry from BX -> AX
shl di, 1
rcl dx, 1 ; carry from DI -> DX
loop shift_loop
; AX:BX and DX:DI now contain the total address of the
; segment start. Now add the offsets.
mov si, FROM_OFFSET_ADDRESS
add bx, [si]
adc ax, 0
mov si, TO_OFFSET_ADDRESS
add di, [si]
adc dx, 0
; AX:BX and DX:DI are now the total addresses of the first
; byte to be moved. First compare AX and DX and go to the
; appropriate routine depending on which address is higher.
; If AX and DX are the same, then compare BX and DI and go
; to the appropriate routine. If BX = DI, the block is being
; moved onto itself, so just exit (there is no work to be done).
cmp ax, dx
ja bottom_to_top ; FROM is higher
jb top_to_bottom ; TO is higher
cmp bx, di
ja bottom_to_top ; FROM is higher
jb top_to_bottom ; TO is higher
jmp exit
bottom_to_top:
mov si, TO_SEG_ADDRESS
mov es, [si] ; to_seg to ES
mov si, TO_OFFSET_ADDRESS
mov di, [si] ; to_offset to DI
mov si, BYTE_COUNT_ADDRESS
mov cx, [si] ; byte count to CX
mov si, FROM_SEG_ADDRESS
mov ax, [si] ; temporary storage for new DS
mov si, FROM_OFFSET_ADDRESS
mov si, [si] ; from_offset to SI
mov ds, ax ; now move from_seg to DS
sub bx, bx ; clear BX
shr cx, 1 ; divide by 2, remainder in CF
rcl bx, 1 ; move CF to low bit of BX
cld ; clear DF (go up)
rep movsw ; the block move (count in CX)
and bx, bx ; one extra byte?
jz exit
movsb ; move one last byte
jmp exit
top_to_bottom:
mov si, TO_SEG_ADDRESS
mov es, [si] ; to_seg to ES
mov si, TO_OFFSET_ADDRESS
mov di, [si] ; to_offset to DI
mov si, BYTE_COUNT_ADDRESS
mov cx, [si] ; byte count to CX
mov si, FROM_SEG_ADDRESS
mov ax, [si] ; temporary storage for new DS
mov si, FROM_OFFSET_ADDRESS
mov si, [si] ; from_offset to SI
mov ds, ax ; now move from_seg to DS
add si, cx ; move to top of block
sub si, 2 ; we were 1 word too far
add di, cx ; move to top of block
sub di, 2 ; we were 1 word too far
sub bx, bx ; clear BX
shr cx, 1 ; divide by 2, remainder in CF
rcl bx, 1 ; move CF to low bit of BX
std ; set DF (go down)
rep movsw ; the block move (count in CX)
and bx, bx ; one extra byte?
jz exit
inc si ; top byte of word
inc di ; top byte of word
movsb ; move one last byte
exit:
POPREGS ax, bx, cx, dx, si, di, es, ds
mov sp, bp
pop bp
ret (10)
BlockMove endp
; - - - - - - - - - -
_TEXT ENDS
END
++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
MULTIPLICATION AND DIVISION
The following are routines for multiple word multiplication and division. They
are the core routines. There must be an intermediate routine which prepares
the information correctly for the core routine and then calls the core
routine. Among other things, these intermediate routines must:
1) deal with signed numbers. They must convert any negative numbers into
positive numbers and keep track of the signs. Then they must alter the
signs of the results if necessary.
2) make copies of numbers for the core routine when the core routine will
destroy or alter the number during the calculation.
3) make decisions about valid results for the multiplication routines. If
we multiply two numbers of length N words, then the result can be N + N
words long. What do you want to do if the result is over N words long? It
is your decision.
4) transfer the results back to the programs if necessary.
These are the things we did in chapter 16, and they are necessary here as
well. In all the following routines you need to pay attention to the lengths.
Some lengths are in BYTES and some lengths are in WORDS. Make sure you know
which is which.
BLOCK MULTIPLICATION
The first multiplication program uses block multiplication. This is simply the
multiple word multiplication that you did in chapters 13 and 16. This time,
instead of multiplying n X 1 words, we will be multiplying n X n words.
The most important thing that this routine does is minimize its work. If n =
100 words, then it is possible for the routine to do 10,000 multiplications.
This takes a lot of time. If we have two 100 word numbers but the first one is
127,911 and the other one is 4,926,948,187,062 the first number has
significant information in two words and the second number has significant
information in three words. We only need to multiply 3 X 2 = 6 words instead
of 10,000 words. As you can see, this will cut the time by a factor of over
1000. This routine requires that the result be distinct from either the
multiplicand or multiplier and be n + n (2n) words long.
First we clear the area for the result. The next section finds the highest
non-zero word of both the multiplicand and multiplier. If either is 0 the
result is 0, so we exit (the result is cleared and is 0). After that comes the
multiplication proper. We multiply the complete multiplicand by one multiplier
word, then cycle to the next multiplier word and so on. We add each DX:AX pair
to the temporary result and propagate any carry that results from the
addition. The result cannot be larger than N + N words, so we will never
propagate past the result area. This is as fast as you can multiply numbers on
the 8086.
+++++++++++++++++++++ << START OF PROGRAM >> +++++++++++++++++++++++
; block multiplication using standard 8086 multiplication
; block_multiply ( length , multiplicand, multiplier, temp_result )
; length is the number of WORDS
; length is a number, but the others are addresses. The temp_result
; space must be (2 X length), and must be distinct from the other
; varibles since it will be overwritten by the routine. This is
; a far routine for C, and after setting up BP, we have:
;
; TEMP_RESULT_ADDRESS EQU [bp + 12]
; MULTIPLIER_ADDRESS EQU [bp + 10]
; MULTIPLICAND_ADDRESS EQU [bp + 8]
; DATA_LENGTH EQU [bp + 6]
INCLUDE \pushregs.mac
; - - - - - - - - - - - - - - - - - - - -
DATASTUFF SEGMENT PUBLIC 'DATA'
multiplicand_top_address dw ?
multiplier_top_address dw ?
temp_bottom_address dw ?
current_multiplier_address dw ?
DATASTUFF ENDS
; - - - - - - - - - - - - - - - - - - - -
CODESTUFF SEGMENT PUBLIC 'CODE'
PUBLIC block_multiply
ASSUME CS:CODESTUFF, DS:DATASTUFF
TEMP_RESULT_ADDRESS EQU [bp + 12]
MULTIPLIER_ADDRESS EQU [bp + 10]
MULTIPLICAND_ADDRESS EQU [bp + 8]
DATA_LENGTH EQU [bp + 6]
; - - - - - - - - - -
block_multiply proc far
push bp
mov bp, sp
pushf ; save DF value
PUSHREGS ax, bx, cx, dx, si, di, es
push ds ; es = ds
pop es
; clear temp_result
mov di, TEMP_RESULT_ADDRESS
mov cx, DATA_LENGTH
shl cx, 1 ; 2 X LENGTH is buffer length
mov ax, 0 ; zero for clearing
cld ; upwards
rep stosw ; store ax
; find the highest multiplicand word which is non-zero
mov di, MULTIPLICAND_ADDRESS
mov dx, DATA_LENGTH
mov cx, dx ; cx = length in words
mov bx, dx
dec bx ; first word is at offset 0
shl bx, 1 ; bx = top word
add di, bx ; di = address of top word
std ; downwards
; ax is still 0
repe scasw ; continue as long as es:[di] is 0
jne first_top_found ; found non-zero word
jmp exit_mult ; multiplicand is 0 so result is 0
first_top_found:
add di, 2 ; we went 2 too far
mov multiplicand_top_address, di ; address of top non-zero word
; no registers have been modified except di and cx
; use the same ax, bx and dx values as before for multiplier.
; find the highest non-zero multiplier word
mov di, MULTIPLIER_ADDRESS
add di, bx ; di = address of top word
mov cx, dx ; cx = length in words
; ax is still 0
repe scasw ; continue as long as es:[di] is 0
jne second_top_found ; found non-zero word
jmp exit_mult ; multiplier is 0 so result is 0
second_top_found:
add di, 2 ; we went 2 too far
mov multiplier_top_address, di ; address of top non-zero word
; the multiplication ********************
mov ax, TEMP_RESULT_ADDRESS
mov temp_bottom_address, ax ; start at bottom
mov si, MULTIPLIER_ADDRESS
mov current_multiplier_address, si ; save address
outer_multiplication_loop:
; set up the registers
mov cx, [si] ; move current multiplier to cx
mov di, MULTIPLICAND_ADDRESS
mov bx, temp_bottom_address
inner_multiplication_loop:
mov ax, cx ; multiplier word to ax
mul WORD PTR [di] ; multiplicand - result in DX:AX
add [bx], ax ; low word of multiplication
adc [bx+2], dx ; high word of multiplication
jnc no_more_carry ; extra work if CF=1
mov si, 4
; keep propagating the carry till CF = 0
propagate_carry:
add WORD PTR [bx+si], 1
jnc no_more_carry
add si, 2 ; next word
jmp propagate_carry
no_more_carry:
add bx, 2 ; next word of temp result
add di, 2 ; next word of multiplicand
cmp di, multiplicand_top_address ; finished?
ja next_multiplier_word
jmp inner_multiplication_loop
next_multiplier_word:
mov si, current_multiplier_address
add si, 2
cmp si, multiplier_top_address
ja exit_mult ; end of multiplication
mov current_multiplier_address, si ; save address
add temp_bottom_address, 2 ; increment for next start
jmp outer_multiplication_loop
; end of the multiplication *************
exit_mult:
POPREGS ax, bx, cx, dx, si, di, es
popf ; restore DF value
mov sp, bp
pop bp
ret ; a C return, so don't pop arguments.
block_multiply endp
; - - - - - - - - - -
CODESTUFF ENDS
END
++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
If you understand all of this you can go on. The next one is even more
difficult.
BINARY MULTIPLICTION
This is how the 8086 does multiplication internally. It is a series of shifts
and additions. We can do the same thing with base 10 numbers.
24763 X 275
24763 ---
24763 |
24763 5
24763 |
24763 ---
247630 ___
247630 |
247630 |
247630 7
247630 |
247630 |
247630 ---
2476300 ---
2476300 2
6,809,825
In the base 10 system this is tedious. In the base 2 system this works well.
You either do NO addition or you do 1 addition. We start at the bottom and add
(either once or not at all), then shift the whole number left one bit. We
repeat this cycle till we are finished with the whole multiplier. Once again,
the pivotal operation is finding the highest non-zero word before starting.
This is about 5 times slower than the first method. The only reason that it is
here is to prepare you for the binary division routine.
We need to reserve an extra word above the multiplicand. If the multiplicand
is 6 words long, we need 7 words for the multiplicand. The 6th word will shift
into that 7th word 1 bit at a time. At the end of our 16 bit cycle, all words
will have shifted up one word.
As the multiplication progresses, the bottom words of the multiplicand will be
0 so we don't bother to add these 0 words.
We load the multiplier into DX one word at a time. We then check this word one
bit at a time. If the bit is 1 we add, if the bit is 0 we do nothing. We shift
the multiplicand left 1 bit each time, whether we add or not.
+++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
; binary multiplication using shifts and addition
; binary_multiply ( length , multiplicand, multiplier, temp_result )
; length is the number of WORDS
; length is a number, but the others are addresses. The temp_result
; space and the multiplicand space must be ((2 X length)+1) WORDS,
; and must be distinct from the calling variables since they will be
; overwritten by the routine. This is a far routine for C, and after
; setting up BP, we have:
; TEMP_RESULT_ADDRESS EQU [bp + 12]
; MULTIPLIER_ADDRESS EQU [bp + 10]
; MULTIPLICAND_ADDRESS EQU [bp + 8]
; DATA_LENGTH EQU [bp + 6]
include \pushregs.mac
; - - - - - - - - - - - - - - - - - - - -
DATASTUFF SEGMENT PUBLIC 'DATA'
multiplicand_length dw ?
multiplier_length dw ?
lowest_non_zero_word dw ?
DATASTUFF ENDS
; - - - - - - - - - - - - - - - - - - - -
CODESTUFF SEGMENT PUBLIC 'CODE'
PUBLIC binary_multiply
ASSUME cs:CODESTUFF, ds:DATASTUFF
TEMP_RESULT_ADDRESS EQU [bp + 12]
MULTIPLIER_ADDRESS EQU [bp + 10]
MULTIPLICAND_ADDRESS EQU [bp + 8]
DATA_LENGTH EQU [bp + 6]
; - - - - - - - - - -
binary_multiply proc far
push bp
mov bp, sp
pushf ; save DF value
PUSHREGS ax, bx, cx, dx, si, di, es
push ds ; es = ds
pop es
; clear temp buffer
mov di, TEMP_RESULT_ADDRESS
mov cx, DATA_LENGTH
shl cx, 1 ; 2 X LENGTH is buffer length
mov ax, 0
cld ; upwards
rep stosw ; store ax
; find the highest word which is non-zero
mov di, MULTIPLICAND_ADDRESS
mov dx, DATA_LENGTH
mov cx, dx ; cx = length in words
mov bx, dx
dec bx
shl bx, 1 ; bx = top word
add di, bx ; di = address of top word
std ; downwards
; ax is still 0
repe scasw
jne first_top_found ; found non-zero word
jmp exit_mult ; multiplicand is 0 so result is 0
first_top_found:
; we went 2 too far + 2 for length + 2 extra for bit shift
add di, 6
sub di, MULTIPLICAND_ADDRESS
shr di, 1 ; divide by 2
mov multiplicand_length, di ; length in WORDS
; no registers have been modified except di and cx
; use the same ax, bx and dx values as before for multiplier.
; find the highest non-zero word
mov di, MULTIPLIER_ADDRESS
add di, bx ; di = address of top word
mov cx, dx ; cx = length in words
; ax is still 0
repe scasw
jne second_top_found ; found non-zero word
jmp exit_mult ; multiplier is 0 so result is 0
second_top_found:
; we went 2 too far + 2 for length
add di, 4
sub di, MULTIPLIER_ADDRESS
mov multiplier_length, di ; length in BYTES
; the multiplication ********************
mov lowest_non_zero_word, 0
multiplicand_loop:
mov ax, lowest_non_zero_word ; # of words shifted
cmp ax, multiplier_length ; length in bytes
jb multiply_a_word
jmp exit_mult ; we are through
; ax still has lowest word count
multiply_a_word:
mov si, MULTIPLIER_ADDRESS
add si, ax ; calculate where multiplier is
mov dx, [si] ; this is current multiplier word
mov cx, 16 ; 16 adds and shifts
add_and_shift_loop:
push cx
shr dx, 1 ; add if low bit is 1
jnc skip_the_addition
mov ax, lowest_non_zero_word ; offset count
mov si, MULTIPLICAND_ADDRESS
add si, ax
mov bx, TEMP_RESULT_ADDRESS
add bx, ax
mov cx, multiplicand_length ; length in words
clc
inner_add_loop:
mov ax, [si]
adc [bx], ax
inc si ; doesn't affect the carry flag
inc si
inc bx
inc bx
loop inner_add_loop
adc WORD PTR [bx], 0 ; one last carry is possible
skip_the_addition:
; shift one bit to the left
mov si, MULTIPLICAND_ADDRESS
add si, lowest_non_zero_word
mov cx, multiplicand_length ; length in words
clc
shift_1_loop:
rcl WORD PTR [si], 1
inc si ; doesn't affect carry flag
inc si
loop shift_1_loop
pop cx
loop add_and_shift_loop
add lowest_non_zero_word, 2 ; move up one word
jmp multiplicand_loop
; end of the multiplication *************
exit_mult:
POPREGS ax, bx, cx, dx, si, di, es
popf ; restore DF value
mov sp, bp
pop bp
ret ; a C return, so don't pop arguments.
binary_multiply endp
; - - - - - - - - - -
CODESTUFF ENDS
END
++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++
BINARY DIVISION
This is by far the hardest to understand. The binary division routine is the
opposite of the multiplication routine. We move the dividend to the remainder
area since it will be modified during the routine. We shift the divisor one
word past the top of the dividend (to make sure that the divisor starts out
larger than the dividend) and then start the shift-subtract cycle. We shift
right 1 bit and then take a look at the two numbers. If the divisor is larger
than the dividend we do nothing and put a 0 bit in the quotient. If the
divisor is smaller, we put a 1 bit in the quotient and subtract the divisor
from the dividend. At the end, what is left of the dividend is our remainder
As usual, we only use only as many words as necessary, both for the numbers
and the individual subtractions.
This is about 5 times slower than the block multiplication. It is possible to
approach the speed of the block multiplication routine by using block division
routine which guesses and then modifies its guess, but it would be almost
impossible to understand what the code does, so I won't show it to you.
+++++++++++++++++++++ << START OF PROGRAM >> ++++++++++++++++++++++
; binary division using shifts and subtraction
; binary_divide ( length , dividend, divisor, quotient, remainder)
; length is the number of WORDS
; length is a number, but the others are addresses. The divisor and
; remainder space will be overwritten one word past the highest non-
; zero word by the subroutine. The remainder space is cleared one word past
; its length. This is a far routine for C, and after setting up BP, we have:
OUR_DIVIDEND_ADDRESS EQU [bp + 14] ; same as remainder address
REMAINDER_ADDRESS EQU [bp + 14]
QUOTIENT_ADDRESS EQU [bp + 12]
DIVISOR_ADDRESS EQU [bp + 10]
DIVIDEND_ADDRESS EQU [bp + 8]
DATA_LENGTH EQU [bp + 6]
include \pushregs.mac
; - - - - - - - - - - - - - - - - - - - -
DATASTUFF SEGMENT PUBLIC 'DATA'
dividend_length dw ?
divisor_length dw ?
; - - - -
top_divisor_address dw ?
bottom_divisor_address dw ?
top_dividend_address dw ?
bottom_dividend_address dw ?
current_quotient_address dw ?
; - - - -
shift_count dw ?
quotient_bit dw ?
DATASTUFF ENDS
; - - - - - - - - - - - - - - - - - - - -
CODESTUFF SEGMENT PUBLIC 'CODE'
PUBLIC binary_divide
ASSUME cs:CODESTUFF, ds:DATASTUFF
; - - - - - - - - - -
binary_divide proc far
push bp
mov bp, sp
pushf ; save DF value
PUSHREGS ax, bx, cx, dx, si, di, es
push ds ; es = ds
pop es
; clear quotient
mov ax, 0 ; zero for clearing
mov dx, DATA_LENGTH ; store for later
mov cx, dx
mov di, QUOTIENT_ADDRESS
cld ; upwards
rep stosw
; move dividend to remainder area
mov si, DIVIDEND_ADDRESS
mov di, REMAINDER_ADDRESS ; our new dividend area
mov cx, dx ; DATA_LENGTH
rep movsw ; upwards
mov [di], ax ; extra 0 above dividend space
; find the highest divisor word which is non-zero
; dx still has DATA_LENGTH
mov bx, dx ; dx = DATA_LENGTH
dec bx
shl bx, 1 ; bx = top word (in # of bytes)
mov di, DIVISOR_ADDRESS
mov bottom_divisor_address, di ; save for later
add di, bx ; di = address of top word
mov cx, dx ; cx = length in words
std ; downwards
; ax is still 0
repe scasw ; look for nonzero
jne first_top_found ; left loop because unequal?
int 0 ; divisor is 0 so divide error
first_top_found:
add di, 2 ; we went 2 too far
mov top_divisor_address, di ; store for later
sub di, DIVISOR_ADDRESS
add di, 2 ; actual length
mov divisor_length, di ; length in BYTES
; no registers have been modified except di and cx
; use the same ax, bx and dx values as before for dividend.
; find the highest non-zero dividend word
; ax is still 0 (from above)
mov di, OUR_DIVIDEND_ADDRESS
mov bottom_dividend_address, di ; save for later
add di, bx ; di = address of top word
mov cx, dx ; dx = length in words
repe scasw ; downwards
jne second_top_found ; equal on exit?
jmp exit_div ; dividend = 0 so quotient is 0, remainder is 0
second_top_found:
; add 2 for overshoot & 2 for calculating length
; top dividend address is just past top of dividend
add di, 4
mov top_dividend_address, di ; this is correct
sub di, OUR_DIVIDEND_ADDRESS
mov dividend_length, di ; length in BYTES
; if dividend length < divisor length, we are done
cmp di, divisor_length
jae shift_divisor
jmp exit_div
shift_divisor:
; figure out shift count.
; change divisor length from bytes to words
; di is still dividend length
mov ax, di ; dividend_length
mov dx, divisor_length
sub ax, dx ; amount of shift
add bottom_divisor_address, ax ; current bottom
add bottom_dividend_address, ax ; current bottom
add ax, 2 ; 2 extra bytes for shift
mov shift_count, ax ; save shift count
shr dx, 1 ; divisor length - BYTES to WORDS
mov cx, dx ; cx is amount of data to shift
inc dx ; one word extra for shift
mov divisor_length, dx ; new divisor_length (WORDS)
; prepare pointers for the shift
mov si, top_divisor_address
mov di, si ; destination pointer
add di, ax ; add the shift
mov top_divisor_address, di ; new top of divisor
rep movsw ; downwards
; zero bottom of divisor
mov ax, 0
mov cx, shift_count
shr cx, 1 ; shift count in words
rep stosw
; set up quotient info
mov ax, QUOTIENT_ADDRESS
add ax, shift_count
sub ax, 2 ; address of top word
mov current_quotient_address, ax
mov quotient_bit, 0001h ; bit to rotate
; ***** the division *****************
division_loop:
cmp shift_count, 0 ; if 0, we are done
ja do_shift_16
jmp exit_div
do_shift_16:
; ++++++++++ SHIFT AND SUBTRACT LOOP ++++++
mov cx, 16
shift_16_loop:
push cx ; save counter
; +++++++++ SHIFT ++++++++++
; shift divisor one bit to the right
ror quotient_bit, 1
mov si, top_divisor_address
mov cx, divisor_length ; length in words
clc ; clear CF
shift_1_loop:
rcr WORD PTR [si], 1
dec si ; doesn't affect carry flag
dec si
loop shift_1_loop
; +++++++++ CHECK FOR SKIP SUBTRACTION +++++++
; skip subtraction if dividend < divisor
mov di, top_divisor_address
mov si, top_dividend_address
mov cx, divisor_length
std ; decrement pointers
repe cmpsw ; cmp dividend, divisor
jb skip_subtraction ; dividend < divisor
; +++++++++++++++ SUBTRACTION ++++++++++++++++
; OR 1 into quotient
mov si, current_quotient_address
mov dx, quotient_bit
or [si], dx
mov si, bottom_divisor_address
mov di, bottom_dividend_address
mov cx, divisor_length ; words
clc ; clear CF
subtraction_loop:
mov dx, [si]
sbb [di], dx
inc si
inc si
inc di
inc di
loop subtraction_loop
; dividend >= divisor, so we have no final borrow
; +++++++++++ AFTER SUBTRACTION ++++++++++++++++
skip_subtraction:
pop cx
loop shift_16_loop
; reset the pointers and counters for the outer loop
sub shift_count, 2
sub top_divisor_address, 2
sub top_dividend_address, 2
sub bottom_divisor_address, 2
sub bottom_dividend_address, 2
sub current_quotient_address, 2
jmp division_loop
; end of the division *************
exit_div:
POPREGS ax, bx, cx, dx, si, di, es
popf ; restore DF value
mov sp, bp
pop bp
ret ; a C return, so don't pop arguments.
binary_divide endp
; - - - - - - - - - -
CODESTUFF ENDS
END
++++++++++++++++++++++ << END OF PROGRAM >> +++++++++++++++++++++++